clear
clear all
cap log close

log using "log-files/2_Figure_B1.smcl", replace smcl

timer on 2

*------------*
* Figure B.1 *
*------------*
	* Producing estimates: this list refers to what to do with the 377
	* obs with missing transmission dates
	foreach c in MAX MIN MEDIAN DROPPED NOTRIMMING {
		* Master data
		use "data/outputs/computo_ulttranstrepdate_missingsatmunmedian.dta", clear

		* Switch trep date from global max to missing
		replace trep_date_ulttrans = . if trep_missing_date == 1

		* Replace or drop missing dates
		if "`c'" == "MIN" {
			bys Pais Dep Prov Muni: egen min_date = min(trep_date_ulttrans)
			replace trep_date_ulttrans = min_date if trep_missing_date == 1
			drop min_date
		}
		else if "`c'" == "MAX" {
			bys Pais Dep Prov Muni: egen max_date = max(trep_date_ulttrans)
			replace trep_date_ulttrans = max_date if trep_missing_date == 1
			drop max_date
		}
		else if "`c'" == "MEDIAN" {
			bys Pais Dep Prov Muni: egen median_date = median(trep_date_ulttrans)
			replace trep_date_ulttrans = median_date if trep_missing_date == 1
			drop median_date
		}
		else if "`c'" == "DROPPED" {
			drop if trep_missing_date == 1
		}

		* Trimming: yes or no
		if "`c'" != "NOTRIMMING" {
			* Trimming
			cumul trep_date_ulttrans, gen(cum_date)
			drop if cum_date <= 0.02 | cum_date >= 0.98
			drop cum_date

			local filename = "semipar_WITH_trimming_missings`c'"
		}
		else {
			* Replacing missing dates at the maximum of the municipality
			bys Pais Dep Prov Muni: egen max_date = max(trep_date_ulttrans)
			replace trep_date_ulttrans = max_date if trep_missing_date == 1
			drop max_date	

			local filename = "semipar_WITHOUT_trimming_missingsMAX"
		}

		* Margin
		gen mmargin_nbnn_so = mshare_nbnn_so - cshare_nbnn_so

		* Without this, Stata can't calculate the right ROT bandwidth
		su trep_date_ulttrans, d
		local global_mean = r(mean)
		local global_sd = r(sd)
		gen trep_date_ulttrans_norm = (trep_date_ulttrans - `global_mean') / `global_sd'

		* Restrict to regression sample
		quietly reg mmargin_nbnn_so $controls_educ trep_date_ulttrans_norm
		keep if e(sample)

		* Binned data using binsreg and all booths
		binsreg mmargin_nbnn_so trep_date_ulttrans_norm, ///
			savedata(temp) replace

		* Tack binned data output on to the end of the main data set	
		gen dots_binid = _n
		merge 1:1 dots_binid using "temp.dta"
		drop _merge
		erase "temp.dta"

		* Semiparametric Robinson estimate
		semipar mmargin_nbnn_so $controls_educ, ///
			nonpar(trep_date_ulttrans_norm) nograph generate(yhat) partial(yhat_parout1)

		* Optimal bins for the partialled-out DV 	
		binsreg yhat_parout1 trep_date_ulttrans_norm, ///
			savedata(temp) replace

		* Rename to make room for the new bin locations and binned means
		drop dots_isknot
		rename dots_x dots_x_nocontrols
		rename dots_fit dots_fit_nocontrols

		* Merge new bin locations and binned means
		merge 1:1 dots_binid using "temp.dta"
		drop _merge
		erase "temp.dta"

		* lp robust: partialed out
		lprobust yhat_parout1 trep_date_ulttrans_norm, ///
			genvars eval(dots_x)

		renvars lprobust_eval - lprobust_CI_r_rb, postfix(_controls)	
			
		* lp robust: no controls	
		lprobust mmargin_nbnn_so trep_date_ulttrans_norm, ///
			genvars eval(dots_x_nocontrols)

		compress
		saveold "data/outputs//`filename'.dta", replace
	}

	* Figure B.1.a
	*-------------
		use "data/outputs/semipar_WITHOUT_trimming_missingsMAX.dta", clear
		
		* X-axis locations for labels
		quietly sum trep_date_ulttrans
		local global_mean = r(mean)
		local global_sd = r(sd)
		local x1 = (tc(20oct2019 17:30:00) - `global_mean') / `global_sd'
		local x2 = (tc(20oct2019 18:30:00) - `global_mean') / `global_sd'
		local x3 = (tc(20oct2019 19:30:00) - `global_mean') / `global_sd'
		local x4 = (tc(20oct2019 20:30:00) - `global_mean') / `global_sd'

		* Plot
		twoway (scatter dots_fit_nocontrols dots_x_nocontrols, ///
				msymbol(Th) msize(vsmall) mcolor(midblue)) ///
			(line lprobust_gx_bc lprobust_eval, ///
				lcolor(midblue) lwidth(medthick)) ///
			(scatter dots_fit dots_x, ///
				msize(vsmall) mcolor(gs6)) ///
			(line lprobust_gx_bc_controls lprobust_eval_controls, ///
				lcolor(gs6) lwidth(medthick)) ///
			(line lprobust_CI_l_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_r_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_l_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)) ///
			(line lprobust_CI_r_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)), ///
			legend(order(2 4) ring(0) pos(11) cols(1) lab(2 "No controls") ///
				lab(4 "Education, region, and rurality controls")) ///
			xlabel(`x1' `" "10/20" "5:30"  "p.m." "' ///
				`x2' `" "10/20" "6:30"  "p.m." "' ///
				`x3' `" "10/20" "7:30"  "p.m." "' ///
				`x4' `" "10/20" "8:30"  "p.m." "', labsize(small)) ///
			graphregion(color(white)) ///
			xsize(7) ysize(5.5) ///
			ylabel(, angle(0) nogrid)  ///
			ytitle("MAS Margin Over CC") ///
			xtitle("Last Transmission Time Stamp") ///
			title("")
		graph export "outputs/generated/Figure_B1a.pdf", replace

	* Figure B.1.b
	*-------------
		use "data/outputs/semipar_WITH_trimming_missingsDROPPED.dta", clear
		
		* X-axis locations for labels
		quietly sum trep_date_ulttrans
		local global_mean = r(mean)
		local global_sd = r(sd)
		local x1 = (tc(20oct2019 17:30:00) - `global_mean') / `global_sd'
		local x2 = (tc(20oct2019 18:30:00) - `global_mean') / `global_sd'
		local x3 = (tc(20oct2019 19:30:00) - `global_mean') / `global_sd'
		local x4 = (tc(20oct2019 20:30:00) - `global_mean') / `global_sd'

		* Plot
		twoway (scatter dots_fit_nocontrols dots_x_nocontrols, ///
				msymbol(Th) msize(vsmall) mcolor(midblue)) ///
			(line lprobust_gx_bc lprobust_eval, ///
				lcolor(midblue) lwidth(medthick)) ///
			(scatter dots_fit dots_x, ///
				msize(vsmall) mcolor(gs6)) ///
			(line lprobust_gx_bc_controls lprobust_eval_controls, ///
				lcolor(gs6) lwidth(medthick)) ///
			(line lprobust_CI_l_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_r_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_l_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)) ///
			(line lprobust_CI_r_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)), ///
			legend(order(2 4) ring(0) pos(11) cols(1) lab(2 "No controls") ///
				lab(4 "Education, region, and rurality controls")) ///
			xlabel(`x1' `" "10/20" "5:30"  "p.m." "' ///
				`x2' `" "10/20" "6:30"  "p.m." "' ///
				`x3' `" "10/20" "7:30"  "p.m." "' ///
				`x4' `" "10/20" "8:30"  "p.m." "', labsize(small)) ///
			graphregion(color(white)) ///
			xsize(7) ysize(5.5) ///
			ylabel(, angle(0) nogrid)  ///
			ytitle("MAS Margin Over CC") ///
			xtitle("Last Transmission Time Stamp") ///
			title("")
		graph export "outputs/generated/Figure_B1b.pdf", replace

	* Figure B.1.c
	*-------------
		use "data/outputs/semipar_WITH_trimming_missingsMIN.dta", clear
		
		* X-axis locations for labels
		quietly sum trep_date_ulttrans
		local global_mean = r(mean)
		local global_sd = r(sd)
		local x1 = (tc(20oct2019 17:30:00) - `global_mean') / `global_sd'
		local x2 = (tc(20oct2019 18:30:00) - `global_mean') / `global_sd'
		local x3 = (tc(20oct2019 19:30:00) - `global_mean') / `global_sd'
		local x4 = (tc(20oct2019 20:30:00) - `global_mean') / `global_sd'

		* Plot
		twoway (scatter dots_fit_nocontrols dots_x_nocontrols, ///
				msymbol(Th) msize(vsmall) mcolor(midblue)) ///
			(line lprobust_gx_bc lprobust_eval, ///
				lcolor(midblue) lwidth(medthick)) ///
			(scatter dots_fit dots_x, ///
				msize(vsmall) mcolor(gs6)) ///
			(line lprobust_gx_bc_controls lprobust_eval_controls, ///
				lcolor(gs6) lwidth(medthick)) ///
			(line lprobust_CI_l_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_r_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_l_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)) ///
			(line lprobust_CI_r_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)), ///
			legend(order(2 4) ring(0) pos(11) cols(1) lab(2 "No controls") ///
				lab(4 "Education, region, and rurality controls")) ///
			xlabel(`x1' `" "10/20" "5:30"  "p.m." "' ///
				`x2' `" "10/20" "6:30"  "p.m." "' ///
				`x3' `" "10/20" "7:30"  "p.m." "' ///
				`x4' `" "10/20" "8:30"  "p.m." "', labsize(small)) ///
			graphregion(color(white)) ///
			xsize(7) ysize(5.5) ///
			ylabel(, angle(0) nogrid)  ///
			ytitle("MAS Margin Over CC") ///
			xtitle("Last Transmission Time Stamp") ///
			title("")
		graph export "outputs/generated/Figure_B1c.pdf", replace

	* Figure B.1.d
	*-------------
		use "data/outputs/semipar_WITH_trimming_missingsMAX.dta", clear
		
		* X-axis locations for labels
		quietly sum trep_date_ulttrans
		local global_mean = r(mean)
		local global_sd = r(sd)
		local x1 = (tc(20oct2019 17:30:00) - `global_mean') / `global_sd'
		local x2 = (tc(20oct2019 18:30:00) - `global_mean') / `global_sd'
		local x3 = (tc(20oct2019 19:30:00) - `global_mean') / `global_sd'
		local x4 = (tc(20oct2019 20:30:00) - `global_mean') / `global_sd'

		* Plot
		twoway (scatter dots_fit_nocontrols dots_x_nocontrols, ///
				msymbol(Th) msize(vsmall) mcolor(midblue)) ///
			(line lprobust_gx_bc lprobust_eval, ///
				lcolor(midblue) lwidth(medthick)) ///
			(scatter dots_fit dots_x, ///
				msize(vsmall) mcolor(gs6)) ///
			(line lprobust_gx_bc_controls lprobust_eval_controls, ///
				lcolor(gs6) lwidth(medthick)) ///
			(line lprobust_CI_l_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_r_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_l_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)) ///
			(line lprobust_CI_r_rb_controls lprobust_eval_controls, ///
				lpattern(dash) lcolor(gs6) lwidth(thin)), ///
			legend(order(2 4) ring(0) pos(11) cols(1) lab(2 "No controls") ///
				lab(4 "Education, region, and rurality controls")) ///
			xlabel(`x1' `" "10/20" "5:30"  "p.m." "' ///
				`x2' `" "10/20" "6:30"  "p.m." "' ///
				`x3' `" "10/20" "7:30"  "p.m." "' ///
				`x4' `" "10/20" "8:30"  "p.m." "', labsize(small)) ///
			graphregion(color(white)) ///
			xsize(7) ysize(5.5) ///
			ylabel(, angle(0) nogrid)  ///
			ytitle("MAS Margin Over CC") ///
			xtitle("Last Transmission Time Stamp") ///
			title("")
		graph export "outputs/generated/Figure_B1d.pdf", replace

	* Figure B.1.e
	*-------------
		use "data/outputs/computo_ulttranstrepdate_missingsatmunmedian.dta", clear

		* Switch trep date from global max to missing
		replace trep_date_ulttrans = . if trep_missing_date == 1

		* Replace missing dates with max in the municipio 
		bys Pais Dep Prov Muni: egen max_date = max(trep_date_ulttrans)
		replace trep_date_ulttrans = max_date if trep_missing_date == 1
		drop max_date	

		drop total* pcs* mshare* cshare*
		gen total_nbnn = CC + FPV + MTS + UCS + MAS + F + PDC + MNR + PANBOL
		*---*
		gen mshare_nbnn_so = MAS / total_nbnn
		gen cshare_nbnn_so = CC / total_nbnn
		gen x = runiform()
		sort trep_date_ulttrans x
		drop x
		gen cumsumtotal_nbnn = sum(total_nbnn)
		*---*
		egen maxcumsumtotal_nbnn = max(cumsumtotal_nbnn)
		gen pcs_nbnn_so = cumsumtotal_nbnn / maxcumsumtotal_nbnn
		drop cumsumtotal_nbnn maxcumsumtotal_nbnn

		* Trimming
		cumul trep_date_ulttrans, gen(cum_date)
		drop if cum_date <= 0.02 | cum_date >= 0.98
		drop cum_date

		gen mmargin_nbnn_so = mshare_nbnn_so - cshare_nbnn_so

		* Without this, Stata can't calculate the right ROT bandwidth
		su trep_date_ulttrans, d
		local global_mean = r(mean)
		local global_sd = r(sd)
		gen trep_date_ulttrans_norm = (trep_date_ulttrans - `global_mean') / `global_sd'

		* Binned data using binsreg
		binsreg mmargin_nbnn_so trep_date_ulttrans_norm, ///
			savedata(temp) replace

		gen dots_binid = _n
		merge 1:1 dots_binid using "temp.dta"
		drop _merge
		erase "temp.dta"

		* lp robust: partialed out
		lprobust mmargin_nbnn_so trep_date_ulttrans_norm, ///
			genvars eval(dots_x)

		local x1 = (tc(20oct2019 17:30:00) - `global_mean') / `global_sd'
		local x2 = (tc(20oct2019 18:30:00) - `global_mean') / `global_sd'
		local x3 = (tc(20oct2019 19:30:00) - `global_mean') / `global_sd'
		local x4 = (tc(20oct2019 20:30:00) - `global_mean') / `global_sd'

		local k=1
		foreach vl in 0.47 0.57 0.87 0.97 {
			gen dif = abs(pcs_nbnn_so - `vl')

			sort dif
			quietly sum trep_date_ulttrans_norm if _n == 1
			local xline_`k' = r(mean)

			local k = `k' + 1
			drop dif
		}

		twoway (scatter dots_fit dots_x, msize(vsmall) mcolor(midblue)) ///
			(line lprobust_gx_bc lprobust_eval, ///
				lcolor(midblue) lwidth(medthick)) ///
			(line lprobust_CI_l_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)) ///
			(line lprobust_CI_r_rb lprobust_eval, ///
				lpattern(dash) lcolor(midblue) lwidth(thin)), ///
			xlabel(`x1' `" "10/20" "5:30"  "p.m." "' ///
				`x2' `" "10/20" "6:30"  "p.m." "' ///
				`x3' `" "10/20" "7:30"  "p.m." "' ///
				`x4' `" "10/20" "8:30"  "p.m." "', labsize(small)) ///
			xline(`xline_1' `xline_2' `xline_3' `xline_4', ///
				lcolor(black) lpattern(dot)) ///
			text(0.37 `=`xline_1'-0.18' "0.47" ///
				0.37 `=`xline_2'+0.18' "0.57" ///
				0.37 `=`xline_3'-0.18' "0.87" ///
				0.37 `=`xline_4'-0.18' "0.97", size(tiny) color(gs6)) ///
			graphregion(color(white)) ///
			xsize(7) ysize(5.5) ///
			ylabel(, angle(0) nogrid) ///
			ytitle("MAS Margin Over CC") ///
			xtitle("Last Transmission Time Stamp") ///
			legend(order(2) lab(2 "Non-parametric Fit")) legend(off)
		graph export "outputs/generated/Figure_B1e.pdf", replace

*------------------------------------------------------------------------------*
timer off 2
timer list 2

log close
clear all
